Mailto: julio.cordero@medma.uni-heidelberg.de
-Instead of an expression of genes, we propose an enrichment score. Which combines the motif score of the TFs and the log10-Pvalue of the motif in the chromatin state. Each Cluster identified represents a group of PEAKS which contain a specific signature of MOTIFS from TFs. - Allow the identification of TFs in the neighborhood Zone. To understand the mechanism of how TFs, cooperate among them. -This Script was used with the Motifs associated with Histone marks. However, it could also be modified to input Motifs associated with ATAC-seq or ChIP-seq. In the case of ATAC-seq, the motifs coming from footprinting analysis or Homer motifs search are highly recommended.
workdir = "./"
setwd(workdir)
PTHA1="../03OUTPUT/"
dir.create(PTHA1)
PORT=PTHA1
TRY="_"
TRY1="02Ia_"
PROJECT="03a_TFinZONE_at_01dpci"
PVALUE=0.05
#### FOR VOLCANO ###
upcol<- "#B2182B" # magenta from PiyG
nc<- "#000000" # black
downcol<- "grey" # green from PiyG
CC= c(downcol, "#F7F7F7",upcol)
BASIC_COL="black"
CO3<- c("lightgrey","#B2182B")
CO_ALU3=c("#d7191c","#d8b365","#542788","grey", "#91bfdb", "grey","grey")
NAME33<- c("Mo_Name","Consensus","P-value","Log_P-value","q-value_Benja","Nor_of_Tar","Per_of_Tar","No_of_Tar_Backg","Per_of_Tar_Backg")
NAME34<- c("HOmer_NAME",NAME33)
mat_m<- data.frame("mask_500_FIND")
## Here you write the Path to the
WORK1="../01DATA_ORI/08MOTIFS_on_PEAKS/"
#WORK1="Y:/002ZF/000FIGURES_ANALY/00GITHUB_01HIS_Chroma_Factors/01HIS_Chrom_Factors_TEST/04MOtifs_Analy_as_SC_MOAC_Fig3_5/02Fig4_MO_MOAC_01dpci/01DATA_ORI/08MOTIFS_on_PEAKS/"
NAME_G2 <-data.frame(list.files(path=WORK1,pattern="*_FIND"))
colnames(NAME_G2)<- "ENH4"
#NAME_G2$GROUP <- gsub("CORR1_01d_", "", NAME_G2$ENH4)
NAME_G2$GROUP <- gsub("*_mask_500_FIND", "", NAME_G2$ENH4)
### this is important to check the SYMBOL
NAME_G22<- data.frame(c("00A_Ia","01Ia_A","02U_Ea","03U_Ia","04U_Pa"))
NAME_G2c<- as.character(t(NAME_G2[,2]))
#WORK11A11="../01DATA_ORI/07aMO_forMOAC/08TSS_Enh_w_01dpci_fHeatmap_ALL.xlsx"
WORK11A11="../01DATA_ORI/07aMO_forSeurat_ANALY/08TSS_Enh_w_01dpci_fHeatmap_ALL.xlsx"
MAT_1A= read_excel(WORK11A11)
#MAT_1A= read.delim(WORK11A11)
colnames(MAT_1A)[1]<- "SYMBOL"
dim(MAT_1A)
## [1] 326 11
##### Set resolution for CLustering
RES=0.5
## sequential:
## - args: function (..., envir = parent.frame())
## - tweaked: FALSE
## - call: NULL
## sequential:
## - args: function (..., workers = 16, envir = parent.frame())
## - tweaked: TRUE
## - call: plan("sequential", workers = 16)
##
## 00A_Ia_01dcpi 01Ia_A_01dcpi 02U_Ea_01dcpi 03U_Ia_01dcpi 04U_Pa_01dcpi
## 278786 117 6814 66147 12513
## [1] 3
b7<- AC_RE_VOL_SC_META
rownames(b7)<- b7$PositionID2
mat_c1<- mat_cm_TMM_BOTH_SC[,rownames(b7)]
rownames(mat_c1)<- mat_cm_TMM_BOTH_SC$SYMBOL
dim(mat_c1)
## [1] 207 3090
dim(b7)
## [1] 3090 5
smartseq2 <- CreateSeuratObject(mat_c1,project = "01d", meta.data = b7)
## png
## 2
arrangeQC_TF <- ggarrange(EC00,EC001, ncol = 2,nrow =1,common.legend = T, align = c("hv"),legend="none")
print(arrangeQC_TF)
grid.arrange( plot1,plot2, ncol = 1,nrow = 2)
## Centering and scaling data matrix
## PC_ 1
## Positive: THRb, Hoxd12, Tbx5, Smad3, PR, Hoxd11, Eomes, Sox3, Olig2, Sox21
## Hoxa9, Hoxa11, Sox10, Hoxd10, Twist2, Smad2, GATA3, NeuroG2, Erra, TRPS1
## Nanog, CEBP, ERG, Smad4, EAR2, ETS1, Pdx1, BHLHA15, CDX4, PBX2
## Negative: Isl1, bHLHE41, Npas4, Gfi1b, Tbet, Tbx21, Otx2, CHR, Hoxa10, ZNF7
## Elk1, HNF6, Cux2, Unknown, PRDM10, SCL, IRF3, Oct6, Max, ELF1
## MYNN, HINFP, Tbox, CRE, STAT1, GATA, E2F3, TCFL2, JunD, NRF
## PC_ 2
## Positive: MNT, BMAL1, NPAS2, RUNX2, RUNX, TEAD, NPAS, MITF, Twist2, CLOCK
## RUNX1, CEBP, TEAD3, THRb, HIC1, Zic, PBX2, Atf3, Eomes, Hoxc9
## Sox3, TEAD2, Olig2, USF1, Usf2, Erra, CDX4, BHLHA15, SF1, Fos
## Negative: ETV4, Fli1, ETV1, GABPA, Elf4, Etv2, ERG, ELF3, EWS, EHF
## ETS1, ELF5, SPDEF, Elk4, Elk1, ETS, ELF1, SpiB, SCL, Npas4
## Meis1, bHLHE41, Isl1, FoxL2, Foxo1, Tbet, IRF8, Tbx21, GSC, Tgif1
## PC_ 3
## Positive: Isl1, Nanog, Tbet, Tbx21, Pitx1, Bapx1, Tbr1, Sox2, RARa, Etv2
## TRPS1, Hoxa10, Bcl6, Otx2, Gata2, Sox6, Gata1, EWS, Gfi1b, CHR
## ZNF7, Hoxa9, Gata4, Sox15, ELF3, ERG, Gata6, SCL, GATA3, ETS1
## Negative: Foxo3, FoxL2, Foxf1, MYB, FOXK1, AMYB, Npas4, Foxo1, HNF6, FOXP1
## Hoxd13, Foxa3, FOXA1, Cux2, Unknown, FOXK2, Elk4, ELF1, Cdx2, Elk1
## Foxa2, Oct6, BMYB, ETS, CRE, MYNN, CDX4, Meis1, Hoxa11, FOXM1
## PC_ 4
## Positive: BATF, Atf3, Fos, Fra1, JunB, Fra2, Fosl2, ETV1, ETV4, HIC1
## GABPA, Elf4, MYB, ETS1, Etv2, Max, ERG, Fli1, Elk4, RUNX2
## OCT, Pdx1, Hoxc9, Smad4, ETS, ELF3, ELF1, E2F3, PBX2, EHF
## Negative: FOXM1, FOXA1, Foxf1, FOXP1, Pitx1, Fox, FoxL2, FOXK2, SCL, Foxa2
## FOXK1, Foxo3, Isl1, GSC, Foxo1, Tbet, Tbx21, FoxD3, Otx2, Foxa3
## Nanog, Gata2, Bcl6, ZNF7, Gata1, TRPS1, Hoxa10, Hoxa13, Tbr1, CHR
## PC_ 5
## Positive: Hoxd13, CDX4, Hoxd11, Sox3, Cdx2, Hoxd12, MYB, Sox17, Hoxa11, ZNF711
## Smad3, Sox7, AMYB, Hoxd10, Elk4, Elf4, PAX5, HIF2a, Tbx5, Sox21
## Smad2, Smad4, Olig2, Sox10, Erra, NFIL3, THRb, RXR, PGR, Unknown
## Negative: Fra1, Fra2, Atf3, JunB, BATF, Fos, Fosl2, Fox, FOXP1, Foxa2
## FOXA1, FOXM1, Foxf1, Foxo3, FoxL2, FOXK2, FOXK1, Foxa3, Isl1, Pitx1
## Tbet, SCL, Tbx21, FoxD3, Otx2, GSC, STAT4, ZNF7, Bcl6, Foxo1
## Plot Pincipal component analysis PCA
grid.arrange( P1, ncol = 1,nrow = 1)
print(P2)
## png
## 2
set.seed(1)
smartseq2 <- FindNeighbors(smartseq2, dims = 1:10)
smartseq2 <- FindClusters(smartseq2, resolution = RES, random.seed= 1, algorithm=1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3090
## Number of edges: 94891
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8747
## Number of communities: 11
## Elapsed time: 0 seconds
CO<-brewer.pal(n = 12, name = "Paired")
COb<-brewer.pal(n = 8, name = "Dark2")
CO2b<-c("#E66100","#000000","#0affc2")
CO2<-c("#b2182b","#ef8a62","#67a9cf")
##EDIT Paired color
COe2<- c("#88CCEE", "#332288","#AA4499", "#44AA99", "#999933", "#882255", "#661100", "#6699CC","#888888","#000000","#77ddcc","#df65b0","#CC79A7","#332288", "#E66100", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99", "#A6CEE3", "#FDBF6F", "#FF7F00","#CAB2D6","#F0E442","#B15928","#7570B3","#E7298A","#666666","#c241f8" ,"mediumaquamarine","#882E72",'#1965B0',"#bf812d")
A1<- c('#b2182b','#d6601d','#f4a582')
A2<- c('#8c510a','#bf812d','#F6C141')
A3<- c("#882E72", "#AE76A3","#D1BBD7")
A4<- c('#01665e','#90C987','#CAE0AB')
A5<- c ('#1965B0','#4393c3',"#92c5de")
CO_ALU2=c("#d7191c","#d8b365","#542788","#abd9e9", "#2c7bb6", "#4d4d4d","grey")
CO_ALU=c("#d7191c","#542788","#d8b365","#abd9e9", "#2c7bb6", "#4d4d4d","grey")
CO_ALU3=c("#d7191c","#d8b365","#542788","grey", "#91bfdb", "grey","grey")
COe<- c(A1,A2,A3,A4,A5)
smartseq2 <- RunUMAP(smartseq2, dims = 1:30, n.neighbors = 30)
smartseq2 <- RunTSNE(smartseq2, dims = 1:20, check_duplicates = FALSE)
##### These PLots are printed later in the Script. here are to check #####
#DimPlot(smartseq2, reduction = "umap", label = T,cols = COe2,pt.size=0.7)
#DimPlot(smartseq2, reduction = "tsne", label = T,cols = COe2,pt.size=0.7)
#DimPlot(smartseq2, reduction = "umap",group.by ='GROUP',cols = CO_ALU,pt.size=0.7)
#DimPlot(smartseq2, reduction = "tsne",group.by ='GROUP',cols = COe,pt.size=0.7)
#DimPlot(smartseq2, reduction = "tsne", label = T)
#DimPlot(smartseq2, reduction = "tsne",group.by ='seurat_clusters', split.by = 'GROUP',cols = COe2,pt.size=0.7)
#DimPlot(smartseq2, reduction = "tsne",group.by ='GROUP', split.by = 'seurat_clusters',cols = CO_ALU3,pt.size=0.7)
#DimPlot(smartseq2, reduction = "umap",group.by ='GROUP',cols = CO_ALU3,pt.size=0.7)
#DimPlot(smartseq2, reduction = "tsne",group.by ='GROUP',cols = CO_ALU3,pt.size=0.7)
{pdf(file=paste(PTHA1,"01Umap_tsne_",PROJECT,"CLU_PC10_Dim30",".pdf",sep=""), width=8, height=3)
P00a<- DimPlot(smartseq2, reduction = "umap", label = T,cols = COe2,pt.size=0.7)
P01a<- DimPlot(smartseq2, reduction = "umap", label = F,cols = COe2,pt.size=0.7)
P00at<- DimPlot(smartseq2, reduction = "tsne", label = T,cols = COe2,pt.size=0.7)
P01at<- DimPlot(smartseq2, reduction = "tsne", label = F,cols = COe2,pt.size=0.7)
P03<-DimPlot(smartseq2, reduction = "umap",group.by ='seurat_clusters', split.by = 'GROUP',cols = COe2,pt.size=0.7)
P03a<- DimPlot(smartseq2, reduction = "umap",group.by ='GROUP', split.by = 'seurat_clusters',cols = CO_ALU3,pt.size=0.7)
P03t<-DimPlot(smartseq2, reduction = "tsne",group.by ='seurat_clusters', split.by = 'GROUP',cols = COe2,pt.size=0.7)
P03at<- DimPlot(smartseq2, reduction = "tsne",group.by ='GROUP', split.by = 'seurat_clusters',cols = CO_ALU3,pt.size=0.7)
arrange1 <- ggarrange(P00a,P01a, ncol = 2,nrow =1,common.legend = T, align = c("hv"),legend="right")
arrange1t <- ggarrange(P00at,P01at, ncol = 2,nrow =1,common.legend = T, align = c("hv"),legend="right")
arrange1c <- ggarrange(P03, ncol = 1,nrow =1,common.legend = T, align = c("hv"),legend="right")
print(arrange1t)
print(arrange1)
}
dev.off()
## png
## 2
{pdf(file=paste(PTHA1,"01Umap_tsne_",PROJECT,"CLU_PC10_Dim30_by_STAGE",".pdf",sep=""), width=12, height=3)
arrange1bt <- ggarrange(P03t, ncol = 1,nrow =1,common.legend = T, align = c("hv"),legend="right")
arrange1ca <- ggarrange(P03a, ncol = 1,nrow =1,common.legend = T, align = c("hv"),legend="right")
arrange1cat <- ggarrange(P03at, ncol = 1,nrow =1,common.legend = T, align = c("hv"),legend="right")
print(arrange1bt)
print(arrange1c)
print(arrange1cat)
}
dev.off()
## png
## 2
print(arrange1t)
print(arrange1)
print(arrange1bt)
print(arrange1c)
print(arrange1cat)
## png
## 2
print(arrange3)
print(arrange3t)
print(arrange2)
grid.arrange( P20,P201, ncol = 1,nrow = 2)
grid.arrange( P05ah_bino1, ncol = 1,nrow = 1)
grid.arrange( P05ah_bino, ncol = 1,nrow = 1)
##Save Seurat Object - Important for further modification of the Plots. You do not need to start from the start of the script.
DATA_SC<- data.frame(smartseq2@assays[["RNA"]]@counts@i)
DATA_SC<- data.frame(smartseq2@assays[["RNA"]]@counts@i)
DATA_SC<- GetAssay(smartseq2,assay = "RNA")
DATA_SC1<- data.frame(DATA_SC@scale.data)
DATA_SC2<- data.frame(DATA_SC@scale.data)
#dim(DATA_SC1)
META_SEU<- data.frame(smartseq2@meta.data)
colnames(DATA_SC1)<- META_SEU$PositionID
colnames(META_SEU)[4]<- "SYMBOL_TF"
GENES001<- c("Twist2","Smad4", "Nanog","Smad2","Fos","Sox9","Sox10","JunB")
GENES002 <- c("Pitx1","Isl1", "Mef2b","FOXA1","TEAD3","GATA3", "Gata6", "Mef2b")
GENES003 <- c("Eomes","Oct4","Foxh1","CUX1","Bapx1", "Elk4","Foxo1","Fli1")
GENES004<- c("Rbp7","Rgcc", "Fabp4","Egfl7","Flt1", "Cd36","Nrp1","Tpm1", "Rbp1")
LE=10
SI=10
YL3="Norm binding of TFs"
XL3="clusters"
P1 <- VlnPlot(smartseq2, GENES001, stack = TRUE, sort = F, flip = T,pt.size=0.02)+geom_point(size = .4,colour = "darkgrey") +scale_fill_manual(values = COe2)+
theme(legend.position = "none",plot.title= element_text(color=BASIC_COL, size=LE, face="bold"), axis.text.y= element_text(size=LE,face="bold"),axis.text.x = element_text(size=SI, face="bold"), axis.title.x =element_text(color=BASIC_COL, size=LE, face="bold"), axis.title.y = element_text(color=BASIC_COL, size=LE, face="bold")) +labs(title=paste(PROJECT, "SEVERAL_TFs",sep="" ) , x=XL3, y = YL3)
P1CM <- VlnPlot(smartseq2, GENES002, stack = TRUE, sort = F, flip = T,pt.size=0.02)+geom_point(size = .4,colour = "darkgrey") +scale_fill_manual(values = COe2)+
theme(legend.position = "none",plot.title= element_text(color=BASIC_COL, size=LE, face="bold"), axis.text.y= element_text(size=LE,face="bold"),axis.text.x = element_text(size=SI, face="bold"), axis.title.x =element_text(color=BASIC_COL, size=LE, face="bold"), axis.title.y = element_text(color=BASIC_COL, size=LE, face="bold")) +labs(title=paste(PROJECT, "SEVERAL_TFs",sep="" ) , x=XL3, y = YL3)
P1CM3 <- VlnPlot(smartseq2, GENES003, stack = TRUE, sort = F, flip = T,pt.size=0.02)+geom_point(size = .4,colour = "darkgrey") +scale_fill_manual(values = COe2)+
theme(legend.position = "none",plot.title= element_text(color=BASIC_COL, size=LE, face="bold"), axis.text.y= element_text(size=LE,face="bold"),axis.text.x = element_text(size=SI, face="bold"), axis.title.x =element_text(color=BASIC_COL, size=LE, face="bold"), axis.title.y = element_text(color=BASIC_COL, size=LE, face="bold")) +labs(title=paste(PROJECT, "SEVERAL_TFs",sep="" ) , x=XL3, y = YL3)
P1CM3
print(P1CM)
pdf(file=paste(PTHA1,"09_", PROJECT,"_CLU_vs_Ex_VIol_groups_STACKED",".pdf",sep=""), width=10, height=15)
print(P1)
arrange1CM <- ggarrange(P1CM, ncol = 1,nrow =1,common.legend = T, align = c("hv"),legend="top")
arrange1CM3 <- ggarrange(P1CM3, ncol = 1,nrow =1,common.legend = T, align = c("hv"),legend="top")
dev.off()
## png
## 2
print(arrange1CM)
print(arrange1CM3)
## png
## 2
print(arrange1CM41)
print(arrange1CM42)
print(arrange1CM43)
## png
## 2
print(arrange12)
print(P04b12)
print(P04c12)
# Code for UMAP and TSNE plots
#FeaturePlot(smartseq2, features =GENES001,min.cutoff=0,max.cutoff=1,cols = CO3)
P05a<-FeaturePlot(smartseq2, features =GENES001,min.cutoff=0,max.cutoff=1,cols = CO3)
P05b<-FeaturePlot(smartseq2, features =GENES002,min.cutoff=0,max.cutoff=1,cols = CO3)
P05c<-FeaturePlot(smartseq2, features =GENES003,min.cutoff=0,max.cutoff=1,cols = CO3)
{pdf(file=paste(PTHA1,"09_",PROJECT,"_CLU_vs_Exp_UMMAP_groups",".pdf",sep=""),width=8, height=6)
print(P05a)
print(P05b)
print(P05c)
}
dev.off()
## png
## 2
P05a1<-FeaturePlot(smartseq2, features =GENES001,min.cutoff=0,max.cutoff=1,cols = CO3,reduction="tsne")
P05b1<-FeaturePlot(smartseq2, features =GENES002,min.cutoff=0,max.cutoff=1,cols = CO3,reduction="tsne")
P05c1<-FeaturePlot(smartseq2, features =GENES003,min.cutoff=0,max.cutoff=1,cols = CO3,reduction="tsne")
{pdf(file=paste(PTHA1,"09_",PROJECT,"_CLU_vs_Ex_TSNE_groups",".pdf",sep=""),width=8, height=6)
print(P05a1)
print(P05b1)
print(P05c1)
}
dev.off()
## png
## 2
print(P05a)
print(P05b)
print(P05c)
print(P05a1)
print(P05b1)
print(P05c1)